import requests
from lxml import etree
import pandas as pd

headers = {
    'cookie': '_ga=GA1.2.1033665289.1621135625; __gads=ID=e6905464a69b6e81-2259ed4ab1c80048:T=1621135624:RT=1621135624:S=ALNI_Map503A9SvNgY4pKTxmZgLoQchxcg; Hm_lvt_3eec0b7da6548cf07db3bc477ea905ee=1621502273,1621762527,1621919834,1624015495; Hm_lpvt_3eec0b7da6548cf07db3bc477ea905ee=1624015495; SERVERID=e861f758a7dc0c8cfeac76f2accad538|1624015494|1624015494; _gid=GA1.2.459096109.1624015495',
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.106 Safari/537.36'
}

url = 'https://www.runoob.com/html/html-tutorial.html'

html = requests.get(url,headers=headers)
pars = etree.HTML(html.text)

datas = []


lis = pars.xpath('//div[@class="design"]/a')
for n in lis:
    name = n.xpath('./text()')[0].strip()
    url = n.xpath('./@href')[0]
    url = 'https://www.runoob.com/'+url
    # print(url)

    datas.append({'name':name,'url':url})

df_datas = pd.DataFrame(datas)


df_datas.to_excel('./模块6作业.xls',sheet_name='作业',header=True,index=False)
# print(df_datas[:20])